
global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/nace_industry.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

*imports and aggregates nace industries
*NACE codes. Last accessed August 2024. https://github.com/jnsprnw/nace-codes/blob/master/codes.csv
import delimited  ${dataset_dir}/import/codes.csv, clear

*drop all subcategories
drop if section == "" & division == .
*keep all divisions but not the top categories (sections)
keep if section == ""
drop section group class isicrev4
ren division code
ren activity description
tempfile divisions
save `divisions', replace

import delimited  ${dataset_dir}/import/codes.csv, clear

drop group class isicrev4

*keep sections and divisions
keep if section != "" | division != .
*carry down the top category value to all subcategories
replace section = section[_n-1] if missing(section)
gen description_parent = activity if division == .
replace description_parent = description_parent[_n-1] if missing(description_parent)
drop if division == .
drop activity
ren division code

mmerge code using `divisions'
ren section code_parent
order code description code_parent description_parent
drop _m

*fix a bit of labeling and codes
replace description = "Travel agency, tour operator and other reservation service and related activities" if description == "Travel agency, tour operator reservation service and related activities"
*fix the one weird code- allow for a trailing white space just in case
replace code_parent = "T" if strlen(code_parent) > 2

*capitalize parent descriptions
replace description_parent = upper(description_parent)
*newer versions have a different desctiption, lets fix that
replace description_parent = "ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS- AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN USE" if description_parent == "ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS- AND SERVICES-PRODUCING  "

export delimited using ${final_dir}/industries.csv, replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat